#load libraries
library(vcfR)
## Warning: package 'vcfR' was built under R version 3.5.2
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.9.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
library(RADstackshelpR)
library(introgress)
## Loading required package: nnet
## Loading required package: genetics
## Warning: package 'genetics' was built under R version 3.5.2
## Loading required package: combinat
## 
## Attaching package: 'combinat'
## The following object is masked from 'package:utils':
## 
##     combn
## Loading required package: gdata
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
## 
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
## 
## Attaching package: 'gdata'
## The following object is masked from 'package:stats':
## 
##     nobs
## The following object is masked from 'package:utils':
## 
##     object.size
## The following object is masked from 'package:base':
## 
##     startsWith
## Loading required package: gtools
## Loading required package: MASS
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 3.5.2
## 
## NOTE: THIS PACKAGE IS NOW OBSOLETE.
## 
##   The R-Genetics project has developed an set of enhanced genetics
##   packages to replace 'genetics'. Please visit the project homepage
##   at http://rgenetics.org for informtion.
## 
## 
## Attaching package: 'genetics'
## The following objects are masked from 'package:base':
## 
##     %in%, as.factor, order
## Loading required package: RColorBrewer
#load data
vcf<-read.vcfR("/Users/devder/Desktop/parrots/la.parrots/parrotsnpsunfiltered.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 2
##   header_line: 3
##   variant count: 10870
##   column count: 39
## 
Meta line 2 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 10870
##   Character matrix gt cols: 39
##   skip: 0
##   nrows: 10870
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant: 10870
## All variants processed
#read in locality info for samples
locs<-read.table("~/Desktop/parrots/la.parrots/parrotsamples.txt", header=T, sep="\t")
colnames(locs)<-c("id","location","tiss")
head(vcf@fix)
##      CHROM      POS   ID      REF ALT QUAL FILTER INFO
## [1,] "uce-1572" "208" "SNP_1" "A" "G" "-1" NA     NA  
## [2,] "uce-1572" "444" "SNP_2" "G" "A" "-1" NA     NA  
## [3,] "uce-4474" "89"  "SNP_3" "G" "A" "-1" NA     NA  
## [4,] "uce-735"  "67"  "SNP_4" "C" "A" "-1" NA     NA  
## [5,] "uce-1371" "378" "SNP_5" "A" "G" "-1" NA     NA  
## [6,] "uce-5661" "481" "SNP_6" "G" "T" "-1" NA     NA
vcf@gt[1:5,1:5]
##      FORMAT AmafinB646 AmafinB714 AmafinMLZ12743 AmafinMLZ25447
## [1,] "GT"   "0/1"      "0/0"      NA             NA            
## [2,] "GT"   "0/0"      "0/0"      "0/0"          "0/0"         
## [3,] "GT"   "0/1"      "0/0"      "0/0"          "0/0"         
## [4,] "GT"   "0/1"      "0/0"      "0/0"          "0/0"         
## [5,] "GT"   "0/0"      "0/0"      NA             NA
#No info on depth of coverage or genotype quality in this vcf. What were our filtering parameters in gatk?

#check that sampling IDs in text file match sample IDs in the vcf
colnames(vcf@gt)[-1] == locs$id
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#check out missing data
missing.by.snp(vcf)
## Picking joint bandwidth of 0.11

##    filt missingness snps.retained
## 1  0.30  0.24962043         10714
## 2  0.50  0.24733446         10642
## 3  0.60  0.22119187          9425
## 4  0.65  0.18170055          7574
## 5  0.70  0.15950759          6607
## 6  0.75  0.10862955          4670
## 7  0.80  0.08567444          3944
## 8  0.85  0.05239711          2948
## 9  0.90  0.03645960          2463
## 10 0.95  0.01090186          1593
## 11 1.00  0.00000000          1072
#probably worth doing some cursory filtering for missing data
#calculate missingness per sample
mat<-extract.gt(vcf)
miss<-c()
for (i in 1:ncol(mat)){
  miss[i]<-sum(is.na(mat[,i]))
}
locs$miss<-miss
locs
##                id location    tiss miss
## 1      AmafinB646       CA  frozen  140
## 2      AmafinB714       CA  frozen  126
## 3  AmafinMLZ12743   Mexico toe pad 7229
## 4  AmafinMLZ25447   Mexico toe pad 6278
## 5  AmafinMLZ46005   Mexico toe pad 5542
## 6  AmafinMLZ54378   Mexico toe pad 5462
## 7      AmavirB564       CA  frozen  156
## 8      AmavirB645       CA  frozen  173
## 9      AmavirB715       CA  frozen  164
## 10     AmavirB716       CA  frozen 4292
## 11     AmavirB717       CA  frozen  170
## 12     AmavirB718       CA  frozen  266
## 13     AmavirB720       CA  frozen 4173
## 14     AmavirB721       CA  frozen  176
## 15     AmavirB724       CA  frozen  167
## 16     AmavirB725       CA  frozen  180
## 17     AmavirB726       CA  frozen 4281
## 18     AmavirB727       CA  frozen  162
## 19     AmavirB728       CA  frozen  183
## 20     AmavirB729       CA  frozen  169
## 21     AmavirB730       CA  frozen 4349
## 22     AmavirB731       CA  frozen  460
## 23     AmavirB734       CA  frozen  166
## 24     AmavirB755       CA  frozen 4659
## 25   AmavirLA7603       CA  frozen 4593
## 26   AmavirLA8540       CA  frozen  143
## 27 AmavirMLZ32264   Mexico toe pad 8309
## 28 AmavirMLZ39529   Mexico toe pad 8099
## 29 AmavirMLZ39531   Mexico toe pad 7867
## 30 AmavirMLZ48335   Mexico toe pad 6324

elevated number of missing genotypes in toepad samples, as expected

identify fixed differences

#remove loci that aren't biallelic
mat<-mat[nchar(vcf@fix[,5]) == 1,]
dim(mat)
## [1] 10841    30
#identify SNPs that are fixed away from LA
#conv.mat[1:5,1:5]
conv.mat<-mat
conv.mat[conv.mat == "0/0"]<-0
conv.mat[conv.mat == "0/1"]<-1
conv.mat[conv.mat == "1/1"]<-2
conv.mat<-as.data.frame(conv.mat)
#convert to numeric
for (i in 1:ncol(conv.mat)){
  conv.mat[,i]<-as.numeric(as.character(conv.mat[,i]))
}

#show colnames to verify you're subsetting correctly
#colnames(conv.mat) 
#calc AF
lilac.af<-(rowSums(conv.mat[,c(3:6)], na.rm=T)/(rowSums(is.na(conv.mat[,c(3:6)]) == FALSE)))/2
red.af<-(rowSums(conv.mat[,c(27:30)], na.rm=T)/(rowSums(is.na(conv.mat[,c(27:30)]) == FALSE)))/2

#find fixed SNPs
diff<-abs(lilac.af - red.af)
#how many SNPs are missing all 4 genotypes in one of the two populations
table(is.na(diff))
## 
## FALSE  TRUE 
##  4618  6223
#how many SNPs are fixed
table(is.na(diff) == FALSE & diff == 1)
## 
## FALSE  TRUE 
## 10422   419
#list some fixed SNPs
head(vcf@fix[,1][is.na(diff) == FALSE & diff == 1])
## [1] "uce-5661" "uce-7816" "uce-7816" "uce-6933" "uce-2714" "uce-4401"
#subsample original matrix to only fixed diff SNPs
gen.mat<-mat[is.na(diff) == FALSE & diff == 1,]
dim(gen.mat)
## [1] 419  30
#subsample matrix converted for AF calcs to only fixed SNPS
conv.mat<-conv.mat[is.na(diff) == FALSE & diff == 1,]
dim(conv.mat)
## [1] 419  30
#write a logical test to convert alleles so that a single number represents one parental ancestry
for (i in 1:nrow(gen.mat)){
  #if 1 is the red crowned allele (e.g. absent in lilacs 3-6)
  if((sum(conv.mat[i,c(3:6)], na.rm=T)/(sum(is.na(conv.mat[i,c(3:6)]) == FALSE)))/2 == 0){
    #swap all '0/0' cells with '2/2'
    gen.mat[i,][gen.mat[i,] == "0/0"]<-"2/2"
    #swap all '1/1' cells with '0/0'
    gen.mat[i,][gen.mat[i,] == "1/1"]<-"0/0"
    #finally convert all '2/2' cells (originally 0/0) into '1/1'
    gen.mat[i,][gen.mat[i,] == "2/2"]<-"1/1"
    #no need to touch hets
  }
}

#convert R class NAs to the string "NA/NA"
gen.mat[is.na(gen.mat) == TRUE]<-"NA/NA"
#if it worked correctly this should be only missing or '1/1'
table(gen.mat[,3])
## 
##   1/1 NA/NA 
##   207   212
table(gen.mat[,4])
## 
##   1/1 NA/NA 
##   292   127
table(gen.mat[,5])
## 
##   1/1 NA/NA 
##   344    75
table(gen.mat[,6])
## 
##   1/1 NA/NA 
##   325    94
#prepare data structures for introgress input
#make locus info df
locus.info<-data.frame(locus=vcf@fix[vcf@fix[,3] %in% rownames(gen.mat),1],
                       type=rep("C", times=nrow(gen.mat)),
                       lg=gsub("uce-","",vcf@fix[vcf@fix[,3] %in% rownames(gen.mat),1]),
                       marker.pos=gsub("SNP_","",rownames(gen.mat)))
#make linkage group numeric
locus.info$lg<-as.numeric(as.character(locus.info$lg))
locus.info$marker.pos<-as.numeric(as.character(locus.info$marker.pos))

#we now have a gt matrix in proper format for introgress
#convert genotype data into a matrix of allele counts
count.matrix<-prepare.data(admix.gen=gen.mat, loci.data=locus.info,
                           parental1="1",parental2="0", pop.id=F,
                           ind.id=F, fixed=T)
## prepare.data is working; this may take a moment
## Processing data for 30 individuals and 419 loci.
#estimate hybrid index values
hi.index.sim<-est.h(introgress.data=count.matrix,loci.data=locus.info,
                    fixed=T, p1.allele="1", p2.allele="0")
## est.h is working; this may take a few minutes
#plot introgress plots with all 419 fixed different SNPs
locus.info$locus<-rep("", times=nrow(locus.info))
#LociDataSim1$lg<-c(1:110)
mk.image(introgress.data=count.matrix, loci.data=locus.info,
         marker.order=NULL,hi.index=hi.index.sim, ylab.image="Individuals",
         xlab.h="Red-crowned ancestry", pdf=F,
         col.image=c("red","green","blue"))

all 419 fixed SNPs regardless of missing data

dev.off() #reset plot window
## null device 
##           1
#calculate mean heterozygosity
het<-calc.intersp.het(introgress.data=count.matrix)
#dev.off()
#make triangle plot
plot(x=hi.index.sim$h, y=het, bg=rgb(0,0,0,alpha=0.3), pch=21, cex=2, col="black",
     xlab="Hybrid Index", ylab="Interspecific heterozygosity",
     ylim=c(0,1))
segments(x0 =0, y0 =0, x1 =.5, y1 =1)
segments(x0 =1, y0 =0, x1 =.5, y1 =1)

filter the fixed differences to investigate role of missing genotypes in toepad samples

#calculate number of missing genotypes in our parental pops for each of the 419 fixed sites
lilac.miss<-c()
red.miss<-c()
for (i in 1:nrow(gen.mat)){
  lilac.miss[i]<-sum(gen.mat[i,3:6] == "NA/NA")
  red.miss[i]<-sum(gen.mat[i,27:30] == "NA/NA")
}

#filter gen.mat to have a minimum of 2 genotypes in both species to call a site fixed
gen.mat.filtered<-gen.mat[lilac.miss <= 2 & red.miss <= 2,]
dim(gen.mat.filtered) #214 SNPs left
## [1] 214  30
locus.info.filtered<-locus.info[lilac.miss <= 2 & red.miss <= 2,]
#revisualize to see if this affects downstream inference
#convert genotype data into a matrix of allele counts
count.matrix<-prepare.data(admix.gen=gen.mat.filtered, loci.data=locus.info.filtered,
                           parental1="1",parental2="0", pop.id=F,
                           ind.id=F, fixed=T)
## prepare.data is working; this may take a moment
## Processing data for 30 individuals and 214 loci.
#estimate hybrid index values
hi.index.sim<-est.h(introgress.data=count.matrix,loci.data=locus.info.filtered,
                    fixed=T, p1.allele="1", p2.allele="0")
## est.h is working; this may take a few minutes
#plot
mk.image(introgress.data=count.matrix, loci.data=locus.info.filtered,
         marker.order=NULL,hi.index=hi.index.sim, ylab.image="Individuals",
         xlab.h="Red-crowned ancestry", pdf=F,
         col.image=c("red","green","blue"))

Thisplot shows 214 fixed SNPs with 50% missing data cutoff in both parental pops. This looks much better. Individuals 1-4 are Lilacs from Mexico, 5&6 are lilacs from LA, 7-9 are apparent hybrids, 10-26 are red crowns from LA, 27:30 are red crowns from Mexico. White bars are UCE loci. UCE 7322 is fascinating. Every single bird we sequenced from LA (except a single het) is homozygous lilac for the fixed for SNP 489. Strong evidence for adaptive introgression of this locus. Definitely worth mining out the full sequence of this UCE and mapping it to zebra finch genome to investigate putative function

dev.off() #reset plot window
## null device 
##           1
#calculate mean heterozygosity
het<-calc.intersp.het(introgress.data=count.matrix)
#dev.off()
#make triangle plot
plot(x=hi.index.sim$h, y=het, bg=rgb(0,0,0,alpha=0.3), pch=21, cex=2, col="black",
     xlab="Hybrid Index", ylab="Interspecific heterozygosity",
     ylim=c(0,1))
segments(x0 =0, y0 =0, x1 =.5, y1 =1)
segments(x0 =1, y0 =0, x1 =.5, y1 =1)

##filter gen.mat to allow no missing data in either species to call a site fixed
##assess downstream effects
#gen.mat.filtered<-gen.mat[lilac.miss == 0 & red.miss == 0,]
#dim(gen.mat.filtered) #214 SNPs left
#locus.info.filtered<-locus.info[lilac.miss == 0 & red.miss == 0,]
#
##revisualize to see if this affects downstream inference
##convert genotype data into a matrix of allele counts
#count.matrix<-prepare.data(admix.gen=gen.mat.filtered, loci.data=locus.info.filtered,
#                           parental1="1",parental2="0", pop.id=F,
#                           ind.id=F, fixed=T)
#
##estimate hybrid index values
#hi.index.sim<-est.h(introgress.data=count.matrix,loci.data=locus.info.filtered,
#                    fixed=T, p1.allele="1", p2.allele="0")
#
##plot
#mk.image(introgress.data=count.matrix, loci.data=locus.info.filtered,
#         marker.order=NULL,hi.index=hi.index.sim, ylab.image="Individuals",
#         xlab.h="Red-crowned ancestry", pdf=F,
#         col.image=c("red","green","blue"))

This looks cleaner, but I actually think we lose a lot of the signal showing just how messy this is. This reduced dataset makes it seem like almost all of the introgressed alleles are exlusively found as het sites, which didn't really seem to be the case when we allowed more missing data above.

dev.off() #reset plot window
## null device 
##           1
##calculate mean heterozygosity
#het<-calc.intersp.het(introgress.data=count.matrix)
##make triangle plot
#plot(x=hi.index.sim$h, y=het, bg=rgb(0,0,0,alpha=0.3), pch=21, cex=2, col="black",
#     xlab="Hybrid Index", ylab="Interspecific heterozygosity",
#     ylim=c(0,1))
#segments(x0 =0, y0 =0, x1 =.5, y1 =1)
#segments(x0 =1, y0 =0, x1 =.5, y1 =1)
#
##removing the missing data certainly seems to make our triangle plot cleaner though,
#so we just have to decide which trade off we want
#do genomic cline analysis
#give each SNP a unique identifier
locus.info.filtered$uce<-locus.info.filtered$locus
locus.info.filtered$locus<-locus.info.filtered$marker.pos

#test for cline neutrality at each SNP with 1000 replicates
cline.object<-genomic.clines(introgress.data=count.matrix, hi.index=hi.index.sim, loci.data=locus.info.filtered,
               sig.test = T, method = "parametric", n.reps = 1000, classification = T)
## genomic.clines is working; be patient, as this may take a while 
## begining analysis. 
## estimating genomic cline for: 30 
## estimating genomic cline for: 31 
## estimating genomic cline for: 37 
## estimating genomic cline for: 68 
## estimating genomic cline for: 123 
## estimating genomic cline for: 226 
## estimating genomic cline for: 245 
## estimating genomic cline for: 262 
## estimating genomic cline for: 390 
## estimating genomic cline for: 406 
## estimating genomic cline for: 489 
## estimating genomic cline for: 496 
## estimating genomic cline for: 600 
## estimating genomic cline for: 627 
## estimating genomic cline for: 644 
## estimating genomic cline for: 708 
## estimating genomic cline for: 748 
## estimating genomic cline for: 766 
## estimating genomic cline for: 785 
## estimating genomic cline for: 795 
## estimating genomic cline for: 821 
## estimating genomic cline for: 844 
## estimating genomic cline for: 859 
## estimating genomic cline for: 899 
## estimating genomic cline for: 946 
## estimating genomic cline for: 990 
## estimating genomic cline for: 1121 
## estimating genomic cline for: 1353 
## estimating genomic cline for: 1354 
## estimating genomic cline for: 1444 
## estimating genomic cline for: 1486 
## estimating genomic cline for: 1508 
## estimating genomic cline for: 1560 
## estimating genomic cline for: 1582 
## estimating genomic cline for: 1636 
## estimating genomic cline for: 1701 
## estimating genomic cline for: 1706 
## estimating genomic cline for: 1855 
## estimating genomic cline for: 1856 
## estimating genomic cline for: 1952 
## estimating genomic cline for: 2038 
## estimating genomic cline for: 2044 
## estimating genomic cline for: 2244 
## estimating genomic cline for: 2284 
## estimating genomic cline for: 2372 
## estimating genomic cline for: 2388 
## estimating genomic cline for: 2492 
## estimating genomic cline for: 2538 
## estimating genomic cline for: 2549 
## estimating genomic cline for: 2625 
## estimating genomic cline for: 2632 
## estimating genomic cline for: 2644 
## estimating genomic cline for: 2750 
## estimating genomic cline for: 2820 
## estimating genomic cline for: 2893 
## estimating genomic cline for: 3018 
## estimating genomic cline for: 3052 
## estimating genomic cline for: 3081 
## estimating genomic cline for: 3109 
## estimating genomic cline for: 3165 
## estimating genomic cline for: 3175 
## estimating genomic cline for: 3185 
## estimating genomic cline for: 3209 
## estimating genomic cline for: 3214 
## estimating genomic cline for: 3279 
## estimating genomic cline for: 3299 
## estimating genomic cline for: 3358 
## estimating genomic cline for: 3432 
## estimating genomic cline for: 3471 
## estimating genomic cline for: 3479 
## estimating genomic cline for: 3531 
## estimating genomic cline for: 3552 
## estimating genomic cline for: 3589 
## estimating genomic cline for: 3694 
## estimating genomic cline for: 3700 
## estimating genomic cline for: 3768 
## estimating genomic cline for: 3826 
## estimating genomic cline for: 3895 
## estimating genomic cline for: 3959 
## estimating genomic cline for: 3972 
## estimating genomic cline for: 4047 
## estimating genomic cline for: 4080 
## estimating genomic cline for: 4119 
## estimating genomic cline for: 4267 
## estimating genomic cline for: 4270 
## estimating genomic cline for: 4406 
## estimating genomic cline for: 4449 
## estimating genomic cline for: 4450 
## estimating genomic cline for: 4462 
## estimating genomic cline for: 4488 
## estimating genomic cline for: 4518 
## estimating genomic cline for: 4595 
## estimating genomic cline for: 4614 
## estimating genomic cline for: 4644 
## estimating genomic cline for: 4674 
## estimating genomic cline for: 4761 
## estimating genomic cline for: 4804 
## estimating genomic cline for: 4869 
## estimating genomic cline for: 4891 
## estimating genomic cline for: 4899 
## estimating genomic cline for: 4918 
## estimating genomic cline for: 4936 
## estimating genomic cline for: 5006 
## estimating genomic cline for: 5015 
## estimating genomic cline for: 5021 
## estimating genomic cline for: 5022 
## estimating genomic cline for: 5099 
## estimating genomic cline for: 5111 
## estimating genomic cline for: 5178 
## estimating genomic cline for: 5232 
## estimating genomic cline for: 5294 
## estimating genomic cline for: 5352 
## estimating genomic cline for: 5621 
## estimating genomic cline for: 5632 
## estimating genomic cline for: 5705 
## estimating genomic cline for: 5750 
## estimating genomic cline for: 5789 
## estimating genomic cline for: 5832 
## estimating genomic cline for: 5991 
## estimating genomic cline for: 5998 
## estimating genomic cline for: 6030 
## estimating genomic cline for: 6044 
## estimating genomic cline for: 6087 
## estimating genomic cline for: 6129 
## estimating genomic cline for: 6134 
## estimating genomic cline for: 6151 
## estimating genomic cline for: 6284 
## estimating genomic cline for: 6326 
## estimating genomic cline for: 6438 
## estimating genomic cline for: 6542 
## estimating genomic cline for: 6592 
## estimating genomic cline for: 6679 
## estimating genomic cline for: 6704 
## estimating genomic cline for: 6717 
## estimating genomic cline for: 6926 
## estimating genomic cline for: 6936 
## estimating genomic cline for: 6986 
## estimating genomic cline for: 7047 
## estimating genomic cline for: 7078 
## estimating genomic cline for: 7122 
## estimating genomic cline for: 7207 
## estimating genomic cline for: 7208 
## estimating genomic cline for: 7315 
## estimating genomic cline for: 7327 
## estimating genomic cline for: 7525 
## estimating genomic cline for: 7658 
## estimating genomic cline for: 7678 
## estimating genomic cline for: 7698 
## estimating genomic cline for: 7699 
## estimating genomic cline for: 7765 
## estimating genomic cline for: 7824 
## estimating genomic cline for: 7825 
## estimating genomic cline for: 7835 
## estimating genomic cline for: 7855 
## estimating genomic cline for: 7902 
## estimating genomic cline for: 8022 
## estimating genomic cline for: 8044 
## estimating genomic cline for: 8167 
## estimating genomic cline for: 8190 
## estimating genomic cline for: 8197 
## estimating genomic cline for: 8366 
## estimating genomic cline for: 8384 
## estimating genomic cline for: 8605 
## estimating genomic cline for: 8609 
## estimating genomic cline for: 8610 
## estimating genomic cline for: 8630 
## estimating genomic cline for: 8685 
## estimating genomic cline for: 8690 
## estimating genomic cline for: 8727 
## estimating genomic cline for: 8809 
## estimating genomic cline for: 8840 
## estimating genomic cline for: 8901 
## estimating genomic cline for: 8982 
## estimating genomic cline for: 9008 
## estimating genomic cline for: 9035 
## estimating genomic cline for: 9075 
## estimating genomic cline for: 9157 
## estimating genomic cline for: 9257 
## estimating genomic cline for: 9335 
## estimating genomic cline for: 9344 
## estimating genomic cline for: 9374 
## estimating genomic cline for: 9525 
## estimating genomic cline for: 9538 
## estimating genomic cline for: 9547 
## estimating genomic cline for: 9549 
## estimating genomic cline for: 9565 
## estimating genomic cline for: 9789 
## estimating genomic cline for: 9871 
## estimating genomic cline for: 9878 
## estimating genomic cline for: 9958 
## estimating genomic cline for: 9972 
## estimating genomic cline for: 9994 
## estimating genomic cline for: 10156 
## estimating genomic cline for: 10218 
## estimating genomic cline for: 10272 
## estimating genomic cline for: 10289 
## estimating genomic cline for: 10300 
## estimating genomic cline for: 10329 
## estimating genomic cline for: 10355 
## estimating genomic cline for: 10367 
## estimating genomic cline for: 10393 
## estimating genomic cline for: 10394 
## estimating genomic cline for: 10419 
## estimating genomic cline for: 10432 
## estimating genomic cline for: 10433 
## estimating genomic cline for: 10482 
## estimating genomic cline for: 10496 
## estimating genomic cline for: 10567 
## estimating genomic cline for: 10705 
## estimating genomic cline for: 10716 
## estimating genomic cline for: 10792 
## estimating genomic cline for: 10856 
## estimating genomic cline for: 10859 
## estimating genomic cline for: 10860 
## genomic clines analysis complete!
#we have 19 snps significantly non-neutral at a p value of 0
nrow(cline.object$Summary.data[cline.object$Summary.data[,4] == 0,])
## [1] 24
#because of our skewed sample size in LA between RC and LC, we have more statistical power to detect
#introgression from LC -> RC than vice versa. For this reason only look at significant LC->RC outliers.
cline.object$Summary.data[cline.object$Quantiles[,1] == 1 & cline.object$Summary.data[,4] == 0,]
##       locus type lnL ratio P-value
##  [1,]    30    1 32.118257       0
##  [2,]   489    1 94.411768       0
##  [3,]  2492    1 12.911472       0
##  [4,]  3165    1 42.875520       0
##  [5,]  3432    1 15.975614       0
##  [6,]  4449    1 28.572559       0
##  [7,]  4488    1 16.316756       0
##  [8,]  4674    1 16.010809       0
##  [9,]  5022    1 52.717615       0
## [10,]  6936    1 27.729711       0
## [11,]  8197    1  7.959912       0
#there are 10

#plot each cline
clines.plot(cline.data = cline.object, pdf = F, quantiles = T, cd=c("LC","het","RC"))

#reset plot window
dev.off()
## null device 
##           1

Because of our skewed sample size in LA between RC and LC, we have more statistical power to detect introgression from LC -> RC than vice versa. For this reason we highlight significant LC->RC outliers.

#plot all the clines overlayed, and color the LC->RC significant outliers in red
cline.data<-cline.object
## homozygote
plot(0:1,0:1,type="n",xlab="Hybrid index",ylab="LC allele freq")
n.loci<-dim(cline.data$Loci.data)[1]
## Get needed values
hi<-cline.data$hybrid.index
ub<-cline.data$Neutral.AA[[1]][1,]
lb<-cline.data$Neutral.AA[[2]][1,]

## Plot neutral cline
polygon.data.matrix<-rbind(hi,lb,ub,lb,ub)[,order(hi)]
polygon.data.matrix.rev<-rbind(hi,lb,ub,lb,ub)[,order(-hi)]
polygon(c(polygon.data.matrix[1,],polygon.data.matrix.rev[1,]),
        c(polygon.data.matrix[2,],polygon.data.matrix.rev[3,]), col="grey",border=NA)

## Plot observed Clines
for(i in 1:n.loci){
  AA.line<-cline.data$Fitted.AA[i,]
  line.matrix<-rbind(hi,AA.line)[,order(hi)]
  lines(line.matrix[1,],line.matrix[2,],lty=1) 
}
#plot significant outliers
for(i in c(1:n.loci)[cline.object$Quantiles[,1] == 1 & cline.object$Summary.data[,4] == 0]){
  AA.line<-cline.data$Fitted.AA[i,]
  line.matrix<-rbind(hi,AA.line)[,order(hi)]
  lines(line.matrix[1,],line.matrix[2,],lty=1, col="red") 
}

plot of all the clines overlayed, with significant LC->RC significant outliers in red